// cd /projects/hsieh_project/proj_201809/code_2_202011/
// qstata sv_weights.do &

set linesize 255
capture log close
log using /projects/hsieh_project/proj_201809/code_2_202011/sv_weights_log, replace text

clear all
cd /projects/

di "$S_TIME $S_DATE"

global rev_date: display %tdYYNNDD date("$S_DATE", "DMY")
di "${rev_date}"

global dir_proj "/projects/hsieh_project/proj_201809/"

global dir_do "${dir_proj}/code_2_202011/"
global dir_data "${dir_proj}/data/"
global dir_out "${dir_proj}/output/202011_main/"
noi capture mkdir ${dir_out}
global dir_outf "${dir_out}/sv_weights/"
noi capture mkdir ${dir_outf}
noi capture mkdir ${dir_outf}/data/

global year1 = 1977
global year2 = 2013

global gl_perc = "10 1"
local l_perc = "10 1"

do "/projects/hsieh_project/code_0_general/f_rounding.do"

global ds_ind "${dir_data}/ind_sum_all"

tempfile ds_temp
global ds_temp = "`ds_temp'"

/* ------------------------------------------------------------------------------
Author: Adarsh Kumar
Objective: Generate different sets of SV weights to be used in regressions, calculations 
*/

capture program drop sv_weight 
program sv_weight 

args ind

keep if inlist(year,$year1,$year2)
keep year `ind' emp_ind n_ind

sort `ind' year

gen ln_emp_ind = ln(emp_ind)
replace emp_ind = - emp_ind if year == ${year1}
replace ln_emp_ind = - ln_emp_ind if year == ${year1}

sort `ind' year
collapse (first) emp_ind_y1=emp_ind (sum) emp_ind ln_emp_ind (min) min_n_nind = n_ind, by(`ind')
gen w_num = emp_ind / ln_emp_ind
replace w_num = -emp_ind_y1 if emp_ind == 0
egen w_den = total(w_num)
gen w_sv = w_num / w_den

*Adjust weights for percs:
foreach perc in $gl_perc{
	di "`perc'"
	gen w_num_`perc' = w_num 
	replace w_num_`perc' = . if min_n_nind < (100 / `perc')
	
	egen w_den_`perc' = total(w_num_`perc')
	gen w_sv_`perc' = w_num_`perc' / w_den_`perc'
}

gen year1 = $year1 
gen year2 = $year2 

end

*Setting ch_ind as the standard industry variable to use 
local indus = "ch_ind"

* 2013 - 1977 SV weight: i.e. the "standard" SV weight
use ${ds_ind}, clear
global year1 = 1977
global year2 = 2013 
sv_weight `indus'
save ${dir_outf}/sv_weights_1977_2013.dta, replace


*5-yearly SV weights with base 1977: 
clear
save ${ds_temp}_a, replace emptyok

foreach iyear in 1982 1987 1992 1997 2002 2007 2013 {
	global year1 = 1977
	global year2 = `iyear'
	
	use ${ds_ind}, clear
	sv_weight `indus'
	
	append using ${ds_temp}_a
	save ${ds_temp}_a, replace 
}

use ${ds_temp}_a, clear 
save ${dir_outf}/sv_weights_5years.dta, replace

*5-yearly SV weights with base iyear (for sales share of top firms time series): 
clear
save ${ds_temp}_a, replace emptyok

foreach iyear in 1977 1982 1987 1992 1997 2002 2007 {
	global year1 = `iyear'
	global year2 = `iyear' + 5
	if `iyear' == 2007 {
		*Adjust to get 2013-2007 instead of 2012-2007
		global year2 = 2013
	}
	di "$year1"
	di "$year2"
	
	use ${ds_ind}, clear
	sv_weight `indus'
	
	append using ${ds_temp}_a
	save ${ds_temp}_a, replace 
}

use ${ds_temp}_a, clear 
save ${dir_outf}/sv_weights_5years_base_iyear.dta, replace
	
